Installation

In [1]:
import numpy as np
import pandas as pd
import folium
import json
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import plotly.plotly as py
import plotly.graph_objs as go
import plotly
plotly.tools.set_credentials_file(username='sl4430', api_key='qAZq4fHIKjJmfzniie8f')
import requests
from bs4 import BeautifulSoup
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import linear_model

Loading Data

Our group focus on Hong Kong data

In [2]:
transaction=pd.read_excel("transaction.xlsx",sheet_name="Dining Transaction",header=0)
tra_HK=transaction[transaction['En Name (Place City)']=='Hong Kong']
tra_HK.head()#15200 rows × 25 columns
Out[2]:
Channel (Order Commonorder) En Name (FFP) En Name (Place City) En Name (Products Bizarea) En Name (Products Producer) En Name (Products Producerflavor) En Name (Promotion Voucher) Name (Place City) Name (Products Bizarea) Name (Products Producer) ... Time Paid (SECOND) Status User Id User Phone Region Miles (SUM) paycommission_current (SUM) Promotion Miles (SUM) Total Price (SUM) Voucher Miles (SUM) Transaction Country
0 2c2p All Nippon Airways/ANA Mileage Club Hong Kong Cheung Sha Wan Kuroiwa Ramen Ramen NaN 香港 长沙湾 面屋黑岩 ... 2018-09-04 17:41:25 C 229347 HK 113 28.25 339 113.0 0 HK
1 2c2p Fortune Wings Club Hong Kong Lai Chi Kok Watami (Lai Chi Kok The Pacifica Mall) Japanese-Style Bar NaN 香港 荔枝角 居食屋「和民」(荔枝角宇晴汇商场) ... 2018-12-27 14:20:52 C 230516 HK 17 5.54 34 69.3 0 HK
2 2c2p British Airways / Executive Club Hong Kong Kowloon Tong Amaroni’s New York Italian Restaurant & Cafe American Referral Bonus for Hong Kong 香港 九龙塘 Amaroni’s New York Italian Restaurant & Cafe ... 2018-08-18 18:25:41 C 359603 HK 206 37.13 618 412.5 1000 HK
3 2c2p British Airways / Executive Club Hong Kong Yuen Long Kanada-ya (Yuen Long) Ramen Referral Bonus for Hong Kong 香港 元朗 金田家(元朗) ... 2018-11-07 19:17:53 C 407072 HK 166 43.16 0 332.0 1000 HK
4 2c2p Singapore Airlines / KrisFlyer Hong Kong Wan Chai Sabah Malaysian Cuisine Malaysian cuisine DOUBLEELEVEN 香港 湾仔 莎巴马来西亚餐厅 ... 2018-11-11 20:59:42 C 253670 SG 101 30.30 0 202.0 111 HK

5 rows × 25 columns

In [3]:
tra_HK.describe()
Out[3]:
Order Id Producer Id User Id Miles (SUM) paycommission_current (SUM) Promotion Miles (SUM) Total Price (SUM) Voucher Miles (SUM)
count 1.520000e+04 15200.000000 15200.000000 15200.000000 15200.000000 15200.000000 15200.000000 15200.000000
mean 5.622396e+09 5432.703487 330552.567697 197.367566 53.837153 181.209145 410.064482 258.371711
std 2.532539e+09 869.328019 90249.840283 327.670489 91.503121 619.614446 831.561309 401.385433
min 1.234821e+09 3650.000000 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000
25% 3.428611e+09 5092.000000 237946.000000 59.000000 16.500000 0.000000 117.000000 0.000000
50% 5.625720e+09 5383.500000 359666.500000 130.000000 36.100000 0.000000 286.000000 0.000000
75% 7.814863e+09 6105.500000 382950.000000 225.000000 61.050000 162.000000 460.000000 388.000000
max 9.998332e+09 7149.000000 485831.000000 15000.000000 3750.000000 45000.000000 36000.000000 1500.000000

Exploratory data analysis

1. Restaurants Groupby by Type/Flavor

In [4]:
plt.style.use('seaborn')
Producerflavor= tra_HK.groupby('En Name (Products Producerflavor)').size().sort_values(ascending=False)
In [5]:
#transaction number /number of restaurants for this type
Producerflavor=pd.DataFrame(Producerflavor)
Producerflavor['Name']=Producerflavor.index
Producerflavor['Number']=Producerflavor['Name'].apply(lambda x: len(tra_HK[tra_HK['En Name (Products Producerflavor)']==x]['En Name (Products Producer)'].unique()))
Producerflavor['Average']=Producerflavor[0]/Producerflavor['Number']

Producerflavor=Producerflavor.sort_values('Average',ascending=False)
trace0 = go.Bar(
    x=Producerflavor[:5]["Name"],
    y=Producerflavor[:5]['Average'],
#     text=['1', '2, '3','4','5'],
    marker=dict(
        color=['darkred','red','salmon','darkorange','yellow',],
        line=dict(
            color='rgb(8,48,107)',
            width=1.5,
        )
    ),
    opacity=0.6
)

data = [trace0]
layout = go.Layout(
    title='Average Transaction for each Restaurant Flavor',
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='text-hover-bar')
/Users/liushujun/anaconda3/lib/python3.6/site-packages/IPython/core/display.py:689: UserWarning:

Consider using IPython.display.IFrame instead

Out[5]:

2. Transactions Grouping by Paid Day of Week and Paid Hour

In [6]:
import datetime
tra_HK['paid hour'] = tra_HK['Time Paid (SECOND)'].apply(lambda x: datetime.datetime.strftime(x, '%H'))
tra_HK['paid_day_of_week'] = tra_HK['Time Paid (SECOND)'].apply(lambda x:x.isoweekday())
/Users/liushujun/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

/Users/liushujun/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [7]:
paid_day_of_week=tra_HK.groupby('paid_day_of_week').size()
# Create a trace
trace = go.Scatter(
    x = paid_day_of_week.index,
    y = paid_day_of_week
)

data = [trace]

py.iplot(data, filename='Paid Day of Week')
/Users/liushujun/anaconda3/lib/python3.6/site-packages/IPython/core/display.py:689: UserWarning:

Consider using IPython.display.IFrame instead

Out[7]:
In [8]:
paid_hour=tra_HK.groupby('paid hour').size()
trace = go.Scatter(
    x = paid_hour.index,
    y = paid_hour
)
layout = go.Layout(
    title='Paid Hour',
)
data = [trace]
py.iplot(data, filename='Paid Hour')
/Users/liushujun/anaconda3/lib/python3.6/site-packages/IPython/core/display.py:689: UserWarning:

Consider using IPython.display.IFrame instead

Out[8]:

3. Transactions Grouping by Campaigns

In [9]:
Groupby_customer = tra_HK.groupby(['Name (Promotion Multiplepromotion)']).size()
Groupby_customer = Groupby_customer.sort_values(ascending=False)
# print(tra_HK['Name (Promotion Multiplepromotion)'].unique())
data = [go.Bar(
            x=Groupby_customer.index[:5],
            y=Groupby_customer[:5]
    )]

py.iplot(data, filename='Campaign')
/Users/liushujun/anaconda3/lib/python3.6/site-packages/IPython/core/display.py:689: UserWarning:

Consider using IPython.display.IFrame instead

Out[9]:

4. Restaurants with top customer loyalty

In [10]:
Groupby_customer = tra_HK.groupby(["User Id","En Name (Products Producer)"]).size().unstack()
avg_loyalty = pd.DataFrame(Groupby_customer.mean())
avg_loyalty = avg_loyalty.rename(columns={0:"Average Loyalty"})
avg_loyalty = avg_loyalty.sort_values(by="Average Loyalty",ascending=False).head()
trace0 = go.Bar(
    x=avg_loyalty.index,
    y=avg_loyalty['Average Loyalty'],
    marker=dict(
        color=['navy','blue','dodgerblue','skyblue','powderblue'],
        line=dict(
            color='rgb(8,48,107)',
            width=1.5,
        )
    ),
    opacity=0.6
)


data = [trace0]
layout = go.Layout(
    title='Customer Loyalty',
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Customer Loyalty')
/Users/liushujun/anaconda3/lib/python3.6/site-packages/IPython/core/display.py:689: UserWarning:

Consider using IPython.display.IFrame instead

Out[10]:

5. Areas with top Transaction Frequencies

In [11]:
Groupby_Area = pd.DataFrame(tra_HK.groupby(["En Name (Products Bizarea)"]).size())
Groupby_Area = Groupby_Area.rename(columns={0:"Transaction Frequency"})
Groupby_Area['Bizarea'] = Groupby_Area.index

Web-Scraping

In [12]:
api_key='AIzaSyB8ssI42-_cXub3e_CZzkmkTTaGDqYdaEc'
def get_location(query):        
    query=query.replace(' ','+')
    url="https://maps.googleapis.com/maps/api/place/textsearch/json?" +"query="+query+"&key="+ api_key
    response_data = ''
    try:
        response=requests.get(url)
        response_data = response.json()
        data = response_data['results'][0]
        location=data['geometry']['location']
    except:
        location=None
    return location
In [13]:
# Groupby_Area['location']= Groupby_Area['Bizarea'].apply(lambda x: get_location(x, api_key))
# Groupby_Area = Groupby_Area.dropna()
# Groupby_Area.to_excel(r'./Groupby_Area.xlsx',columns=['Transaction Frequency','Bizarea','location'], index=False,encoding='utf-8')
In [14]:
Area = pd.read_excel("Groupby_Area.xlsx",header=0)
import json
import geojson
with open("hksar_18_district_boundary.json","r") as f:
    shape = json.loads(f.read())
features = shape['features']
polygon = []
for dict_ in features:
    polygon.append(dict_['geometry']['coordinates'][0])
from shapely.geometry import Polygon, Point, MultiPolygon
poly_list=[]
for p in polygon:
    poly = Polygon(p)
    poly_list.append(poly)
In [15]:
import ast
for i in range(Area.shape[0]):
    for p in range(len(poly_list)):
        try:
            loca = ast.literal_eval(Area.loc[i].loc["location"])           
            x = loca['lat']
            y = loca['lng']
            point = Point(y, x)
            if poly_list[p].contains(point):
                Area.loc[i,"poly_num"] = p
        except:
            pass
In [16]:
Area = Area.dropna()
Transaction_Frequency = pd.DataFrame(Area.groupby("poly_num").sum())
Transaction_Frequency.loc[12] = 0

Transaction_Frequency_ = pd.Series(list(Transaction_Frequency["Transaction Frequency"]),index=Transaction_Frequency.index)

TFasc = Transaction_Frequency.sort_values(by="Transaction Frequency")
TFasc_ = pd.Series(range(18),index=TFasc.index)
In [17]:
m = folium.Map(location = [22.28056,114.17222],zoom_start=10)
In [18]:
def process_coord(poly):
    new_poly = []
    for coord in poly:
        new_poly.append(list(coord))
    return new_poly
In [19]:
def createGeoJsonObject(poly_list):
    zone_data_dict = dict()
    zone_data_dict['type'] = 'FeatureCollection'
    zone_data_dict_features = list()
    zone_data_dict['features'] = zone_data_dict_features
    
    for i in range(len(poly_list)):
        zone_feature = dict()
        zone_id  = i
        coordinates = process_coord(list(poly_list[i].exterior.coords))
        zone_feature['geometry'] = dict()
        zone_feature['geometry']['coordinates'] = [coordinates]
        zone_feature['geometry']['type'] = 'Polygon'
        zone_feature['properties'] = dict()
        zone_feature['properties']['zone_id'] = zone_id
        zone_feature['type'] = 'Feature'
        zone_data_dict['features'].append(zone_feature)
    return zone_data_dict
zone_data_dict=createGeoJsonObject(poly_list)
In [20]:
m.choropleth(geo_data=json.dumps(zone_data_dict),data=Transaction_Frequency_,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.3,line_opacity=0.4)
folium.LayerControl().add_to(m)
m
/Users/liushujun/anaconda3/lib/python3.6/site-packages/folium/folium.py:426: FutureWarning:

The choropleth  method has been deprecated. Instead use the new Choropleth class, which has the same arguments. See the example notebook 'GeoJSON_and_choropleth' for how to do this.

Out[20]:
In [21]:
n = folium.Map(location = [22.28056,114.17222],zoom_start=10)
n.choropleth(geo_data=json.dumps(zone_data_dict),data=TFasc_,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.3,line_opacity=0.4)
folium.LayerControl().add_to(n)
n
Out[21]:

Modeling

Regression

In [84]:
seed = 1
np.random.seed(seed)
Y=tra_HK['paycommission_current (SUM)']
X=tra_HK[['Miles (SUM)','Promotion Miles (SUM)','Voucher Miles (SUM)','paid hour','paid_day_of_week']]
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.25)
In [85]:
regr = linear_model.LinearRegression()
regr.fit(X_train, Y_train)
Out[85]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)
In [86]:
Y_pred=regr.predict(X_test)
dY=(Y_test-Y_pred)
np.dot(dY.T,dY)/len(dY)
Out[86]:
695.1612394830973
normalization
In [87]:
from sklearn import preprocessing
X_normalized = preprocessing.normalize(X, norm='l2')
Y=Y.to_frame(name=None)
Y_normalized = preprocessing.normalize(Y,norm='l2')
In [88]:
np.random.seed(seed)
X=tra_HK[['Miles (SUM)','Promotion Miles (SUM)','Voucher Miles (SUM)','paid hour','paid_day_of_week']]
X_train,X_test,Y_train,Y_test=train_test_split(X_normalized,Y_normalized,test_size=0.25)
regr = linear_model.LinearRegression()
regr.fit(X_train, Y_train)
Y_pred=regr.predict(X_test)
dY=(Y_test-Y_pred)
np.dot(dY.T,dY)/len(dY)
Out[88]:
array([[0.01847482]])

Decision Trees

In [91]:
def categorize_Y(pc):
    if ((pc > 16.2) and (pc <= 36.215)) :
        a = 1.0
    elif ((pc > 36.215) and (pc <= 61.35)) :
        a = 2.0
    elif pc >61.35:
        a = 3.0
    else:
        a = 4.0
    return a
y_category = pd.Series(Y.values).apply(categorize_Y)
In [90]:
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculationv
In [168]:
np.random.seed(seed)
X_train,X_test,Y_train,Y_test=train_test_split(X,y_category,test_size=0.25)
feature_cols = ['Miles (SUM)', 'Promotion Miles (SUM)', 'Voucher Miles (SUM)', 'paid hour','paid_day_of_week'] 
In [169]:
lab_enc = preprocessing.LabelEncoder()
X_train['Miles (SUM)'] = lab_enc.fit_transform(X_train['Miles (SUM)'])
X_train['Promotion Miles (SUM)'] = lab_enc.fit_transform(X_train['Promotion Miles (SUM)'])
X_train['Voucher Miles (SUM)'] = lab_enc.fit_transform(X_train['Voucher Miles (SUM)'])
X_train['paid hour'] = lab_enc.fit_transform(X_train['paid hour'])
X_train['paid_day_of_week'] = lab_enc.fit_transform(X_train['paid_day_of_week'])
Y_train = lab_enc.fit_transform(Y_train)
Y_test = lab_enc.fit_transform(Y_test)
/Users/jizngziwei/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

/Users/jizngziwei/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:3: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

/Users/jizngziwei/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:4: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

/Users/jizngziwei/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:5: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

/Users/jizngziwei/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:6: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

In [163]:
clf = DecisionTreeClassifier()
clf = clf.fit(X_train,Y_train)
y_pred = clf.predict(X_test)
y_pred
Out[163]:
array([2, 3, 2, ..., 1, 2, 2])
In [164]:
# Accuracy is 1.5% which is very poor
print("Accuracy:",metrics.accuracy_score(Y_test, y_pred))
Accuracy: 0.6976315789473684
In [165]:
# Pruning the tree
clf = DecisionTreeClassifier(criterion="entropy", max_depth=3)
clf = clf.fit(X_train,Y_train)
y_pred = clf.predict(X_test)
print("Accuracy:",metrics.accuracy_score(Y_test, y_pred))
# Accuracy goes up to 2.5%, still not ideal
Accuracy: 0.8160526315789474

RandomForest

In [172]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import label_binarize
In [176]:
rfc = RandomForestClassifier()
model = rfc.fit(X_train, Y_train)
y_pred = model .predict(X_test)
print("Accuracy:",metrics.accuracy_score(Y_test, y_pred))
Accuracy: 0.7763157894736842
/Users/jizngziwei/anaconda3/lib/python3.6/site-packages/sklearn/ensemble/forest.py:246: FutureWarning:

The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.

Campaign Analysis (A/B Testing)

Web scraping

In [22]:
#con_data['En Name (Products Producer)']=control.index
#ccon_data['location']= control['En Name (Products Producer)'].apply(lambda x: get_location(x))
# with open('control group with location', 'a+') as f:        
#     control.to_csv(f, header=False)
location = pd.read_excel("Control group with location.xlsx",index_col=0)
location = location.dropna()

ANA Activity (Airline-Specific)

In [23]:
##control group
data=tra_HK[tra_HK['Name (Promotion Multiplepromotion)'].isnull()]
con_data=data.groupby('En Name (Products Producer)').mean()
In [24]:
def difference(df):
    paycommission_diff_list=[]
    pct_paycommission_diff_list=[]
    new_index=[]
    for i in df.index:
        try:
            paycommission = df.loc[i].loc["paycommission_current (SUM)"]
            con_paycommission = con_data.loc[i].loc["paycommission_current (SUM)"]
            total_price = df.loc[i].loc["Total Price (SUM)"] 
            con_total_price = con_data.loc[i].loc["Total Price (SUM)"]
           #total_mile = df.loc[i].loc["Miles (SUM)"]
            #con_total_mile = con_data.loc[i].loc["Miles (SUM)"]
            paycommission_diff = paycommission - con_paycommission
            pct_paycommission_diff = (paycommission_diff / con_paycommission) * 100
            paycommission_diff_list.append(paycommission_diff)
            pct_paycommission_diff_list.append(pct_paycommission_diff) 
            new_index.append(i)
        except:
            pass
    new_df = pd.DataFrame([paycommission_diff_list,pct_paycommission_diff_list]).T
    new_df.columns = ["Actual Revenue Difference","Actual Revenue Change in %"]
    new_df.index = new_index
    return new_df
In [25]:
#September 3X ANA miles
data10=tra_HK.loc[(tra_HK['Name (Promotion Multiplepromotion)']=='全场3倍全日空里程累积')|(tra_HK['Name (Promotion Multiplepromotion)']=='3倍全日空里數累積')|(tra_HK['Name (Promotion Multiplepromotion)']=='September 3X ANA miles')]
data10=data10.groupby('En Name (Products Producer)').mean()
In [26]:
result1 = difference(data10)
In [27]:
import json
import geojson
with open("hksar_18_district_boundary.json","r") as f:
    shape = json.loads(f.read())
In [28]:
features = shape['features']
polygon = []
for dict_ in features:
    polygon.append(dict_['geometry']['coordinates'][0])
In [29]:
#!pip install pyshp
from shapely.geometry import Polygon, Point, MultiPolygon
poly_list=[]
for shape in polygon:
    poly = Polygon(shape)
    poly_list.append(poly)
In [30]:
result1.loc[:,"poly_num"] = 0
result1 = result1.dropna()
import ast
for i in result1.index:
    for p in range(len(poly_list)):
        try:
            loca = ast.literal_eval(location.loc[i].loc["location"])           
            x = loca['lat']
            y = loca['lng']
            point = Point(y, x)
            if poly_list[p].contains(point):
                result1.loc[i,"poly_num"] = p
        except:
            pass
In [31]:
mean_effect1 = pd.DataFrame(result1.groupby("poly_num").mean())
mean_effect1.loc[17,:] = 0
mean_effect1.loc[11,:] = 0
mean_effect1.loc[12,:] = 0
mean_effect1 = mean_effect1.sort_values("poly_num")
mean_effect_1 = pd.Series(list(mean_effect1["Actual Revenue Difference"]),index=mean_effect1.index)
In [32]:
measc1 = mean_effect1.sort_values(by="Actual Revenue Difference")
measc_1 = pd.Series(range(18),index=measc1.index)
print(measc1['Actual Revenue Difference'].mean())
print(measc1['Actual Revenue Difference'].sum())
4.848012350961863
87.26422231731354
In [33]:
def process_coord(poly):
    new_poly = []
    for coord in poly:
        new_poly.append(list(coord))
    return new_poly
In [34]:
zone_data_dict = createGeoJsonObject(poly_list)
In [35]:
m = folium.Map(location = [22.28056,114.17222],zoom_start=10)
m.choropleth(geo_data=json.dumps(zone_data_dict),data=mean_effect_1,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.3,line_opacity=0.4)
folium.LayerControl().add_to(m)
m
Out[35]:
In [36]:
mm = folium.Map(location = [22.28056,114.17222],zoom_start=10)
mm.choropleth(geo_data=json.dumps(zone_data_dict),data=measc_1,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.3,line_opacity=0.4)
folium.LayerControl().add_to(mm)
mm
Out[36]:

Traditional Japanese Taste 2X Miles (Restaurant-Specific)

In [37]:
data1=tra_HK.loc[(tra_HK['Name (Promotion Multiplepromotion)']=='爱恋和风滋味 2倍里程')|(tra_HK['Name (Promotion Multiplepromotion)']=='愛戀和風滋味 2倍里數獎賞')|(tra_HK['Name (Promotion Multiplepromotion)']=='Traditional Japanese Taste 2x Bonus Miles')]
data1=data1.groupby('En Name (Products Producer)').mean()
In [38]:
result2 = difference(data1)
result2.loc[:,"poly_num"] = 0
result2 = result2.dropna()
import ast
for i in result2.index:
    for p in range(len(poly_list)):
        try:
            loca = ast.literal_eval(location.loc[i].loc["location"])           
            x = loca['lat']
            y = loca['lng']
            point = Point(y, x)
            if poly_list[p].contains(point):
                result2.loc[i,"poly_num"] = p
        except:
            pass
In [39]:
mean_effect2 = pd.DataFrame(result2.groupby("poly_num").mean())
mean_effect2.loc[1,:] = 0
mean_effect2.loc[2,:] = 0
mean_effect2.loc[3,:] = 0
mean_effect2.loc[4,:] = 0
mean_effect2.loc[5,:] = 0
mean_effect2.loc[6,:] = 0
mean_effect2.loc[7,:] = 0
mean_effect2.loc[8,:] = 0
mean_effect2.loc[9,:] = 0
mean_effect2.loc[12,:] = 0
mean_effect2.loc[13,:] = 0
mean_effect2.loc[14,:] = 0
mean_effect2.loc[15,:] = 0
mean_effect2.loc[16,:] = 0
mean_effect2.loc[17,:] = 0
mean_effect2 = mean_effect2.sort_values("poly_num")
mean_effect_2 = pd.Series(list(mean_effect2["Actual Revenue Difference"]),index=mean_effect2.index)
In [40]:
measc2 = mean_effect2.sort_values(by="Actual Revenue Difference")
measc_2= pd.Series(range(18),index=measc2.index)
print(measc2['Actual Revenue Difference'].mean())
print(measc2['Actual Revenue Difference'].sum())
2.335237794612794
42.03428030303029
In [41]:
m1 = folium.Map(location = [22.28056,114.17222],zoom_start=10)
m1.choropleth(geo_data=json.dumps(zone_data_dict),data=mean_effect_2,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.3,line_opacity=0.4)
folium.LayerControl().add_to(m1)
m1
Out[41]:
In [42]:
mm1 = folium.Map(location = [22.28056,114.17222],zoom_start=10)
mm1.choropleth(geo_data=json.dumps(zone_data_dict),data=measc_2,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.3,line_opacity=0.4)
folium.LayerControl().add_to(mm1)
mm1
Out[42]:

'Dah Sing 2.5X Avios' (Card-Specific)

In [43]:
data2=tra_HK.loc[(tra_HK['Name (Promotion Multiplepromotion)']=='Dah Sing 2.5X Avios')|(tra_HK['Name (Promotion Multiplepromotion)']=='大新信用卡2.5倍Avios')|(tra_HK['Name (Promotion Multiplepromotion)']=='2.5X Avios (Dah Sing Credit Card)')]
data2=data2.groupby('En Name (Products Producer)').mean()
In [44]:
result3 = difference(data2)
result3.loc[:,"poly_num"] = 0
result3 = result3.dropna()
import ast
for i in result3.index:
    for p in range(len(poly_list)):
        try:
            loca = ast.literal_eval(location.loc[i].loc["location"])           
            x = loca['lat']
            y = loca['lng']
            point = Point(y, x)
            if poly_list[p].contains(point):
                result3.loc[i,"poly_num"] = p
        except:
            pass
/Users/liushujun/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:14: RuntimeWarning:

divide by zero encountered in double_scalars

/Users/liushujun/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:14: RuntimeWarning:

invalid value encountered in double_scalars

In [45]:
mean_effect3 = pd.DataFrame(result3.groupby("poly_num").mean())
mean_effect3.loc[17,:] = 0
mean_effect3.loc[13,:] = 0
mean_effect3.loc[12,:] = 0
mean_effect3 = mean_effect3.sort_values("poly_num")
mean_effect_3 = pd.Series(list(mean_effect3["Actual Revenue Difference"]),index=mean_effect3.index)
In [46]:
measc3 = mean_effect3.sort_values(by="Actual Revenue Difference")
measc_3 = pd.Series(range(18),index=measc3.index)
print(measc3['Actual Revenue Difference'].mean())
print(measc3['Actual Revenue Difference'].sum())
7.79655847443443
140.33805253981973
In [47]:
m2 = folium.Map(location = [22.28056,114.17222],zoom_start=10)
m2.choropleth(geo_data=json.dumps(zone_data_dict),data=mean_effect_3,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.3,line_opacity=0.4)
folium.LayerControl().add_to(m2)
m2
Out[47]:
In [48]:
mm2 = folium.Map(location = [22.28056,114.17222],zoom_start=10)
mm2.choropleth(geo_data=json.dumps(zone_data_dict),data=measc_3,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.3,line_opacity=0.4)
folium.LayerControl().add_to(mm2)
mm2
Out[48]:

"11.11 You're Not Alone 2x Bonus Miles" (Festival-Specific)

In [49]:
data3=tra_HK.loc[(tra_HK['Name (Promotion Multiplepromotion)']=='11.11 光棍不寂寞 2倍里數')|(tra_HK['Name (Promotion Multiplepromotion)']=='11.11 光棍节2倍里程')|(tra_HK['Name (Promotion Multiplepromotion)']=="11.11 You're Not Alone 2x Bonus Miles")]
data3=data3.groupby('En Name (Products Producer)').mean()
In [50]:
result4 = difference(data3)
result4.loc[:,"poly_num"] = 0
result4 = result4.dropna()
import ast
for i in result4.index:
    for p in range(len(poly_list)):
        try:
            loca = ast.literal_eval(location.loc[i].loc["location"])           
            x = loca['lat']
            y = loca['lng']
            point = Point(y, x)
            if poly_list[p].contains(point):
                result4.loc[i,"poly_num"] = p
        except:
            pass
In [51]:
mean_effect4 = pd.DataFrame(result4.groupby("poly_num").mean())
mean_effect4.loc[3,:] = 0
mean_effect4.loc[8,:] = 0
mean_effect4.loc[10,:] = 0
mean_effect4.loc[11,:] = 0
mean_effect4.loc[12,:] = 0
mean_effect4.loc[13,:] = 0
mean_effect4.loc[15,:] = 0
mean_effect4.loc[16,:] = 0
mean_effect4.loc[17,:] = 0
mean_effect4 = mean_effect4.sort_values("poly_num")
mean_effect_4 = pd.Series(list(mean_effect4["Actual Revenue Difference"]),index=mean_effect4.index)
In [52]:
measc4 = mean_effect4.sort_values(by="Actual Revenue Difference")
measc_4 = pd.Series(range(18),index=measc4.index)
print(measc4["Actual Revenue Difference"].mean())
print(measc4["Actual Revenue Difference"].sum())
11.598240047355922
208.76832085240662
In [53]:
m3 = folium.Map(location = [22.28056,114.17222],zoom_start=10)
m3.choropleth(geo_data=json.dumps(zone_data_dict),data=mean_effect_4,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.3,line_opacity=0.4)
folium.LayerControl().add_to(m3)
m3
Out[53]:
In [54]:
mm3 = folium.Map(location = [22.28056,114.17222],zoom_start=10)
mm3.choropleth(geo_data=json.dumps(zone_data_dict),data=measc_4,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.3,line_opacity=0.4)
folium.LayerControl().add_to(mm3)
mm3
Out[54]:

'2X Avios' (Different Bonus Times Analysis)

In [55]:
data4=tra_HK.loc[(tra_HK['Name (Promotion Multiplepromotion)']=='2X Avios')|(tra_HK['Name (Promotion Multiplepromotion)']=='全場2倍Avios')]
data4=data4.groupby('En Name (Products Producer)').mean()
In [56]:
result5 = difference(data4)
result5.loc[:,"poly_num"] = 0
result5 = result5.dropna()
import ast
for i in result5.index:
    for p in range(len(poly_list)):
        try:
            loca = ast.literal_eval(location.loc[i].loc["location"])           
            x = loca['lat']
            y = loca['lng']
            point = Point(y, x)
            if poly_list[p].contains(point):
                result5.loc[i,"poly_num"] = p
        except:
            pass
/Users/liushujun/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:14: RuntimeWarning:

invalid value encountered in double_scalars

In [57]:
mean_effect5 = pd.DataFrame(result5.groupby("poly_num").mean())
mean_effect5.loc[3,:]=0
mean_effect5.loc[10,:]=0
mean_effect5.loc[12,:]=0
mean_effect5.loc[13,:]=0
mean_effect5.loc[15,:]=0
mean_effect5.loc[16,:]=0
mean_effect5.loc[17,:]=0
mean_effect5 = mean_effect5.sort_values("poly_num")
mean_effect_5 = pd.Series(list(mean_effect5["Actual Revenue Difference"]),index=mean_effect5.index)
In [58]:
difference_in_times = mean_effect_5 - mean_effect_3
print(difference_in_times.mean())
print(difference_in_times.sum())
0.6358276028203943
11.444896850767098
In [59]:
m5 = folium.Map(location = [22.28056,114.17222],zoom_start=10)
m5.choropleth(geo_data=json.dumps(zone_data_dict),data=difference_in_times,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.5,line_opacity=0.4)
folium.LayerControl().add_to(m5)
m5
Out[59]:
In [60]:
measc5 = difference_in_times.sort_values()
measc_5 = pd.Series(range(18),index=measc5.index)

mm5 = folium.Map(location = [22.28056,114.17222],zoom_start=10)
mm5.choropleth(geo_data=json.dumps(zone_data_dict),data=measc_5,
             key_on='feature.properties.zone_id',
             fill_color='YlOrRd',fill_opacity=0.3,line_opacity=0.4)
folium.LayerControl().add_to(mm5)
mm5
Out[60]:
In [ ]:
 
In [ ]: